package Statistics;

use strict;

sub stat_of_index {
  # the paths to dbm files stroing index and statistics
  my $dbm_path = 'produced/corpus-data/cranfieldcorpus';
  my $tcdbm = 'cranfieldcorpus-tc-s';
  my $idurldbm = 'cranfieldcorpus-docid-to-url';
  my @stat = ();

  # read term counts
  my %temp = ();
  dbmopen (%temp, "$dbm_path/$tcdbm", 0666) or die "Could not open dbm file";
  my $num_term = scalar(keys(%temp));
  push @stat, $num_term;
  dbmclose %temp;

  # read number of docs
  my %temp2 = ();
  dbmopen (%temp2, "$dbm_path/$idurldbm", 0666) or die "Could not open dbm file";
  my $num_doc = scalar(keys(%temp2));
  push @stat, $num_doc;
  dbmclose %temp2;

  # find the size of index
  my $size = `du -s produced/corpus-data/cranfieldcorpus-tf-s/`;
  $size =~ s/[^0-9]+.*//g;
  push @stat, $size;
  return @stat;
}

1;

